import numpy as np
import pandas as pd
import seaborn as sns
import os
import matplotlib.pyplot as plt
from datetime import datetime, date
from scipy.stats import pearsonr, spearmanr, zscore
import statsmodels.api as sm
from itertools import combinations, permutations
import warnings
warnings.filterwarnings('ignore')
plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False #用来正常显示负号
# 数据读取、清洗
def clean_data(df_name):
index_count = df_name.shape[0]
drop_columns = set()
for column in df_name.columns:
missing_count = df_name[column].isna().sum()
if missing_count/index_count > 0.5:
drop_columns.add(column)
else:
df_name[column].interpolate(inplace=True)
df_name[column].fillna(df_name[column].loc[df_name[column].first_valid_index()],inplace=True)
return drop_columns
class DataConstruct:
drop_columns = {'000046.SZ', '002002.SZ', '000666.SZ', '002013.SZ'}
def __init__(self, data_path):
self.data_path = data_path
self.data_space = self.cal_return()
def read_data(self):
file_names = os.listdir(self.data_path)
data_space = []
for file_name in file_names:
if file_name.endswith('.csv'):
file_path = os.path.join(self.data_path, file_name)
df_name = file_name[:-4]
globals()[df_name] = pd.read_csv(file_path)
if df_name=='RESSET_FINRATIO':
globals()[df_name]['TRADE_DT'] = pd.to_datetime(globals()[df_name]['TRADE_DT'], format='%Y/%m/%d')
else:
globals()[df_name]['TRADE_DT'] = pd.to_datetime(globals()[df_name]['TRADE_DT'],format='%Y%m%d')
globals()[df_name]['TRADE_DT'] = globals()[df_name]['TRADE_DT'].dt.to_period('M')
if df_name in ['RevenueTechnicalFactor1', 'RESSET_FINRATIO']:
for column in globals()[df_name].columns[2:]:
data_space.append(column)
globals()[column] = globals()[df_name].pivot_table(index='TRADE_DT', columns='S_INFO_WINDCODE', values=column)
else:
data_space.append(df_name)
globals()[df_name].set_index('TRADE_DT',inplace=True)
return data_space
def process_data_get_factors(self):
data_space = self.read_data()
for df_name in data_space:
DataConstruct.drop_columns = DataConstruct.drop_columns | clean_data(globals()[df_name])
for df_name in data_space:
to_drop = DataConstruct.drop_columns & set(globals()[df_name].columns)# & set(globals()['industry'].index)
globals()[df_name].drop(columns=to_drop,inplace=True)
globals()['pe_ttm'] = globals()['pe_ttm'].apply(lambda x: 1.0000/x)
globals()['mv'] = globals()['mv'].apply(np.log)
return data_space
def cal_return(self):
data_space = self.process_data_get_factors()
global stock_return, stock_return_next
stock_return = globals()['close'].pct_change()
stock_return_next = stock_return.shift(periods=-1)
data_space.extend(['stock_return','stock_return_next'])
return data_space
def write_code_list(self):
df = pd.DataFrame(globals()['pe_ttm'].columns.tolist())
df.to_csv('code_list1.csv', index=False, header=False)
return 0
def print_all(self):
for df_name in self.data_space:
print('{%s} (%d * %d):' % (df_name,globals()[df_name].shape[0],globals()[df_name].shape[1]))
print(globals()[df_name].iloc[:5,:5])
print('\n')
# 单因子:分组回测
def backtest_group(factor, bins_n):
if factor not in globals().keys():
print("因子"+factor+"不存在可用数据,无法回测")
return 0
globals()[factor+'_t'] = globals()[factor].T
labels = ['第' + str(i + 1) + '分位' for i in range(bins_n)]
globals()[factor + '_t_groups'] = pd.DataFrame(index=globals()[factor+'_t'].index)
globals()[factor + '_groups_return'] = pd.DataFrame(index=globals()[factor + '_t'].columns, columns=labels+['基准组合'])
globals()[factor + '_groups_value'] = pd.DataFrame(index=globals()[factor + '_t'].columns, columns=labels+['基准组合'])
globals()[factor + '_groups_value'].iloc[0] = 1.0000
pre_value = globals()[factor + '_groups_value'].iloc[0]
i=0
for trade_date in globals()[factor+'_t'].columns[1:]:
globals()[factor + '_t_groups'][trade_date] = pd.qcut(globals()[factor+'_t'][globals()[factor+'_t'].columns[i]],bins_n,labels=labels)
for bin in labels:
stock_for_bin = list(globals()[factor + '_t_groups'][globals()[factor + '_t_groups'][trade_date]==bin].index)
globals()[factor + '_groups_return'].loc[trade_date,bin] = globals()['stock_return'].loc[trade_date,stock_for_bin].mean()
globals()[factor + '_groups_value'].loc[trade_date,bin] = pre_value[bin] * (1 + globals()[factor + '_groups_return'].loc[trade_date,bin])
globals()[factor + '_groups_return'].loc[trade_date,'基准组合'] = globals()['stock_return'].loc[trade_date].mean()
globals()[factor + '_groups_value'].loc[trade_date,'基准组合'] = pre_value['基准组合'] * (1 + globals()[factor + '_groups_return'].loc[trade_date,'基准组合'])
pre_value = globals()[factor + '_groups_value'].loc[trade_date]
i+=1
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10,5), gridspec_kw={'left':0.06, 'bottom':0.14, 'right':0.965})
s = 0
for tp in ['return','value']:
globals()[factor + f'_groups_{tp}'].plot(ax=axes[s],kind='line')
axes[s].set_title(factor + f'因子分组回测(因子组合{tp})')
s += 1
fig.suptitle(factor + '因子分组测试')
plt.show()
return 0
# 单因子:行业、市值中性化,IC回测
def factor_zscore_neutral(factor):
factor_data = globals()[factor].apply(zscore, axis=1)
mv_data = globals()['mv'].copy()
if globals()[factor].shape[1] != mv_data.shape[1]:
mv_data = mv_data[globals()[factor].columns]
factor_neutral = pd.DataFrame(index=factor_data.index, columns=factor_data.columns)
global industry
industry = pd.read_csv('industry.csv')
industry.set_index('stock_code',inplace=True)
industry.drop(columns=['stock_name','sw_industry_code'],inplace=True)
industry_dummy = pd.get_dummies(globals()['industry']['sw_industry_name'],drop_first=True)
for trade_date in factor_data.index:
regression = pd.DataFrame(index=factor_data.columns)
regression['factor'] = factor_data.loc[trade_date].tolist()
regression['mv'] = mv_data.loc[trade_date].tolist()
regression = pd.concat([regression,industry_dummy],axis=1,join='inner')
if factor == 'mv':
X = regression[list(industry_dummy.columns)]
else:
X = regression[['mv']+list(industry_dummy.columns)]
X = sm.add_constant(X)
y = regression['factor']
model = sm.OLS(y, X.astype(float)).fit()
factor_neutral.loc[trade_date] = model.resid.tolist()
return factor_neutral
def backtest_icir(factor):
factor_data = factor_zscore_neutral(factor)
factor_data.drop(factor_data.index[-1],axis=0,inplace=True)
next_return = globals()['stock_return_next'].drop(globals()['stock_return_next'].index[-1],axis=0)
globals()[factor+'_ic'] = pd.DataFrame(index=factor_data.index,columns=['normal_ic','rank_ic'])
for trade_date in factor_data.index:
globals()[factor + '_ic'].loc[trade_date,'normal_ic'], _ = pearsonr(factor_data.loc[trade_date],next_return.loc[trade_date,factor_data.columns])
globals()[factor + '_ic'].loc[trade_date, 'rank_ic'], _ = spearmanr(factor_data.loc[trade_date],next_return.loc[trade_date,factor_data.columns])
print(f'【{factor}因子IC分析】')
for ttype in ['normal', 'rank']:
print(f'{ttype}_ic均值:'+str(round(globals()[factor+'_ic'][f'{ttype}_ic'].mean(),4))+f',{ttype}_ic标准差:'+str(round(globals()[factor+'_ic'][f'{ttype}_ic'].std(),4))+f',{ttype}_icir:'+str(round(globals()[factor+'_ic'][f'{ttype}_ic'].mean()/globals()[factor+'_ic'][f'{ttype}_ic'].std(),4))+',IC>0占比:'+str(round(len(globals()[factor+'_ic'][f'{ttype}_ic'][globals()[factor+'_ic'][f'{ttype}_ic']>0])/len(globals()[factor+'_ic'][f'{ttype}_ic']),4)))
fig, ax1 = plt.subplots(figsize=(10,4))
ax1.plot(globals()[factor + '_ic'].index.to_timestamp(),globals()[factor + '_ic']['normal_ic'],color='b')
ax1.set_xlabel('TRADE_DT')
ax1.set_ylabel('normal_ic',color='b')
ax1.axhline(y=0, color='b', linestyle='--')
ax2 = ax1.twinx()
ax2.plot(globals()[factor + '_ic'].index.to_timestamp(),globals()[factor + '_ic']['rank_ic'],color='r')
ax2.set_ylabel('rank_ic',color='r')
ax2.axhline(y=0, color='r', linestyle='--')
plt.title(factor+'因子回测(ic时间序列变化图)')
plt.show()
return 0
# 多因子:交叉分组回测
def dual_factor_cross_test(factor1, factor2, bins_n):
code_list = list(set(globals()[factor1 + '_t'].index.tolist()) & set(globals()[factor2 + '_t'].index.tolist()))
if factor1 in ['ROETTM', 'CurRt', 'NPPCCutGrRt', 'TotAstTRtTTM', 'NetOCFTOReve', 'CurTotLia', 'OPITPrf']:
globals()[factor2 + '_t'] = globals()[factor2].T
globals()[factor2 + '_t'] = globals()[factor2 + '_t'][globals()[factor1 + '_t'].columns]
if factor2 in ['ROETTM', 'CurRt', 'NPPCCutGrRt', 'TotAstTRtTTM', 'NetOCFTOReve', 'CurTotLia', 'OPITPrf']:
globals()[factor1 + '_t'] = globals()[factor1].T
globals()[factor1 + '_t'] = globals()[factor1 + '_t'][globals()[factor2 + '_t'].columns]
globals()[factor1 + '_t'] = globals()[factor1 + '_t'].loc[code_list]
globals()[factor1 + '_t'] = globals()[factor1 + '_t'].loc[code_list]
if factor1 + '_t' not in globals():
backtest_group(factor1, bins_n)
if factor2 + '_t' not in globals():
backtest_group(factor2, bins_n)
labels1 = [factor1 + str(i + 1) for i in range(bins_n)]
labels2 = [factor2 + str(i + 1) for i in range(bins_n)]
sample_df = pd.DataFrame(index=globals()[factor1 + '_t'].columns, columns=labels2+['基准组合'])
sample_df1 = sample_df.copy()
sample_df1.iloc[0] = 1.0000
globals()['res_return_'+factor1+'_'+factor2] = {x: sample_df.copy() for x in labels1}
globals()['res_value_'+factor1+'_'+factor2] = {x: sample_df1.copy() for x in labels1}
f1_group = pd.DataFrame(index=globals()[factor1 + '_t'].index)
i = 0
for trade_date in globals()[factor1 + '_t'].columns[1:]:
f1_group[trade_date] = pd.qcut(globals()[factor1 + '_t'][globals()[factor1 + '_t'].columns[i]], bins_n, labels=labels1)
for bin in labels1:
stock_for_bin_f1 = f1_group[f1_group[trade_date]==bin].index.tolist()
f2_group = pd.DataFrame(index=stock_for_bin_f1)
f2_group[trade_date] = pd.qcut(globals()[factor2 + '_t'].loc[stock_for_bin_f1,globals()[factor1 + '_t'].columns[i]], bins_n, labels=labels2)
for bin1 in labels2:
stock_for_bin_f2 = f2_group[f2_group[trade_date]==bin1].index.tolist()
globals()['res_return_'+factor1+'_'+factor2][bin].loc[trade_date, bin1] = globals()['stock_return'].loc[trade_date, stock_for_bin_f2].mean()
globals()['res_value_'+factor1+'_'+factor2][bin].loc[trade_date, bin1] = globals()['res_value_'+factor1+'_'+factor2][bin].loc[globals()[factor1 + '_t'].columns[i], bin1] * (1 + globals()['res_return_'+factor1+'_'+factor2][bin].loc[trade_date, bin1])
globals()['res_return_'+factor1+'_'+factor2][bin].loc[trade_date, '基准组合'] = globals()['stock_return'].loc[trade_date, stock_for_bin_f1].mean()
globals()['res_value_'+factor1+'_'+factor2][bin].loc[trade_date, '基准组合'] = globals()['res_value_'+factor1+'_'+factor2][bin].loc[globals()[factor1 + '_t'].columns[i], '基准组合'] * (1 + globals()['res_return_'+factor1+'_'+factor2][bin].loc[trade_date, '基准组合'])
i += 1
fig, axes = plt.subplots(nrows=2, ncols=bins_n, figsize=(10,6), gridspec_kw={'left':0.036, 'bottom': 0.105, 'right':0.983, 'top': 0.897, 'wspace':0.145, 'hspace': 0.455})
s = 0
for tp in ['return', 'value']:
j = 0
for bin in labels1:
globals()[f'res_{tp}_' + factor1 + '_' + factor2][bin].plot(ax=axes[s,j],kind='line')
axes[s,j].set_title(bin + f'因子组合{tp}时间序列变化图')
j += 1
s += 1
fig.suptitle(factor1 + '因子&' + factor2 + '因子交叉分组测试')
plt.show()
return 0
# 多因子:相关系数
def get_corr(factor1, factor2):
corr = []
date_list = list(set(globals()[factor1 + '_t'].columns.tolist()) & set(globals()[factor2 + '_t'].columns.tolist()))
code_list = list(set(globals()[factor1 + '_t'].index.tolist()) & set(globals()[factor2 + '_t'].index.tolist()))
for trade_date in date_list:
corr.append(pearsonr(globals()[factor1 + '_t'].loc[code_list,trade_date], globals()[factor2 + '_t'].loc[code_list,trade_date]))
globals()['corr_matrix'].loc[factor1,factor2] = np.mean(corr)
globals()['corr_matrix'].loc[factor2, factor1] = np.mean(corr)
print(factor1 + '因子&' + factor2 + '因子相关系数: ' + str(round(np.mean(corr), 4)))
return np.mean(corr)
# 因子库回测
def test_n_corsstest(factor_to_test, bins_n_single, bins_n_dual):
for factor in factor_to_test:
backtest_group(factor, bins_n_single)
backtest_icir(factor)
for group in combinations(factor_to_test, 2):
get_corr(list(group)[0], list(group)[1])
dual_factor_cross_test(list(group)[0], list(group)[1], bins_n_dual)
dual_factor_cross_test(list(group)[1], list(group)[0], bins_n_dual)
globals()['corr_matrix'] = globals()['corr_matrix'].astype(float)
plt.figure(figsize=(10, 8))
sns.heatmap(globals()['corr_matrix'], annot=True, cmap='YlGnBu', fmt=".2f", square=True)
plt.title('因子相关系数矩阵')
plt.xlabel('Factors')
plt.ylabel('Factors')
plt.show()
# 构建因子数据库
dt = DataConstruct('div_datas/')
dt.print_all()
{adj_close} (159 * 1972):
000001.SZ 000002.SZ 000006.SZ 000009.SZ 000011.SZ
TRADE_DT
2010-12 566.95 920.12 116.76 65.03 21.76
2011-01 549.71 917.88 119.11 68.87 22.33
2011-02 571.98 913.40 120.95 84.85 24.08
2011-03 577.36 972.73 131.34 70.54 24.62
2011-04 653.48 949.22 130.16 67.75 29.59
{close} (159 * 1972):
000001.SZ 000002.SZ 000006.SZ 000009.SZ 000011.SZ
TRADE_DT
2010-12 15.79 8.22 6.97 16.77 6.83
2011-01 15.31 8.20 7.11 17.76 7.01
2011-02 15.93 8.16 7.22 21.88 7.56
2011-03 16.08 8.69 7.84 18.19 7.73
2011-04 18.20 8.48 7.77 17.47 9.29
{div_12m} (159 * 1972):
000001.SZ 000002.SZ 000006.SZ 000009.SZ 000011.SZ
TRADE_DT
2010-12 0.7599 0.8516 1.1478 0.2087 3.7258
2011-01 0.7599 0.8537 1.1252 0.1971 3.7258
2011-02 0.7599 0.8578 1.1080 0.1600 3.7258
2011-03 0.7599 0.8055 1.0204 0.1924 3.7258
2011-04 0.7599 0.8255 1.0296 0.2003 3.7258
{mv} (159 * 1972):
000001.SZ 000002.SZ 000006.SZ 000009.SZ 000011.SZ
TRADE_DT
2010-12 24.731115 25.227296 22.391463 23.629723 22.127041
2011-01 24.700244 25.224860 22.411350 23.687081 22.153054
2011-02 24.739942 25.219970 22.426703 23.895705 22.228587
2011-03 24.749314 25.282899 22.509087 23.711004 22.250825
2011-04 24.873159 25.258436 22.500118 23.670617 22.434655
{pe_ttm} (159 * 1972):
000001.SZ 000002.SZ 000006.SZ 000009.SZ 000011.SZ
TRADE_DT
2010-12 0.111347 0.062448 0.053597 0.017136 0.017665
2011-01 0.114838 0.062601 0.088819 0.016181 0.017212
2011-02 0.113189 0.062908 0.087466 0.013134 0.015960
2011-03 0.112133 0.076225 0.080737 0.016404 0.015609
2011-04 0.112064 0.078955 0.083926 0.018215 0.031607
{ROETTM} (53 * 1972):
S_INFO_WINDCODE 000001.SZ 000002.SZ 000006.SZ 000009.SZ 000011.SZ
TRADE_DT
2010-12 18.7505 16.4655 19.5847 13.0475 20.0185
2011-03 19.6679 16.1679 19.8346 13.2868 39.3927
2011-06 20.8248 16.1207 20.1171 13.2218 29.6570
2011-09 13.2202 16.1956 23.2231 11.5363 29.3072
2011-12 14.0206 18.1712 15.5064 10.1887 22.7792
{CurRt} (53 * 1925):
S_INFO_WINDCODE 000002.SZ 000006.SZ 000009.SZ 000011.SZ 000012.SZ
TRADE_DT
2010-12 1.5852 2.0348 2.0401 1.3145 0.7394
2011-03 1.4798 1.8241 2.2538 1.5556 0.8402
2011-06 1.4472 1.7249 2.4856 1.3432 0.6838
2011-09 1.3965 1.5310 2.1409 1.3008 0.6540
2011-12 1.4081 1.3927 1.8372 1.2624 0.6493
{NPPCCutGrRt} (53 * 1972):
S_INFO_WINDCODE 000001.SZ 000002.SZ 000006.SZ 000009.SZ 000011.SZ
TRADE_DT
2010-12 24.3779 33.4852 110.8195 67.7180 45.9244
2011-03 51.9121 10.0150 218.7476 -51.1087 53264.5773
2011-06 58.5081 11.0732 70.9762 -20.3746 154.1927
2011-09 63.3194 14.3419 149.0837 4.9186 143.6999
2011-12 65.9062 36.9758 -5.3197 -1.9106 60.6875
{TotAstTRtTTM} (53 * 1972):
S_INFO_WINDCODE 000001.SZ 000002.SZ 000006.SZ 000009.SZ 000011.SZ
TRADE_DT
2010-12 0.0274 0.2871 0.2953 0.3671 0.3456
2011-03 0.0276 0.2667 0.3234 0.3104 0.6348
2011-06 0.0293 0.2559 0.3407 0.3518 0.4964
2011-09 0.0271 0.2447 0.3789 0.3325 0.4647
2011-12 0.0299 0.2805 0.3066 0.3728 0.4393
{NetOCFTOReve} (53 * 1972):
S_INFO_WINDCODE 000001.SZ 000002.SZ 000006.SZ 000009.SZ 000011.SZ
TRADE_DT
2010-12 120.6634 4.4115 46.4706 -18.2278 -10.2478
2011-03 367.0922 -24.7823 61.4355 -36.0902 -39.5073
2011-06 260.0689 19.1399 73.0443 -14.0752 -38.6337
2011-09 86.2822 -5.1022 58.3608 -26.8802 -32.9223
2011-12 -48.7108 4.7218 46.2896 -20.3717 -25.6621
{CurTotLia} (53 * 1925):
S_INFO_WINDCODE 000002.SZ 000006.SZ 000009.SZ 000011.SZ 000012.SZ
TRADE_DT
2010-12 80.5028 62.3775 56.0464 84.5849 49.0328
2011-03 84.7000 71.4818 51.9318 83.3135 51.6812
2011-06 84.9579 76.4470 47.8392 94.3155 51.6158
2011-09 87.0344 89.5555 55.1359 95.0460 53.8889
2011-12 87.8920 92.8506 62.2953 94.5521 56.6879
{OPITPrf} (53 * 1972):
S_INFO_WINDCODE 000001.SZ 000002.SZ 000006.SZ 000009.SZ 000011.SZ
TRADE_DT
2010-12 92.3867 93.2270 95.5654 48.9114 92.4922
2011-03 93.2695 99.3269 99.9263 42.4872 99.9312
2011-06 91.5557 98.5198 94.6570 62.4365 99.7979
2011-09 93.9326 97.7823 97.0782 63.5321 99.9971
2011-12 94.8669 95.3213 98.0730 61.1790 100.3475
{S_RISK_VARIANCE20} (159 * 1972):
S_INFO_WINDCODE 000001.SZ 000002.SZ 000006.SZ 000009.SZ 000011.SZ
TRADE_DT
2010-12 0.043705 0.241605 0.484243 0.230277 0.205580
2011-01 0.087365 0.211352 0.309248 0.495517 0.441427
2011-02 0.041962 0.063655 0.059765 0.747305 0.067242
2011-03 0.041182 0.061819 0.081115 0.365652 0.214133
2011-04 0.111408 0.059983 0.144665 0.352050 0.163832
{S_RISK_VARIANCE60} (159 * 1972):
S_INFO_WINDCODE 000001.SZ 000002.SZ 000006.SZ 000009.SZ 000011.SZ
TRADE_DT
2010-12 0.109308 0.201698 0.331925 0.480538 0.162898
2011-01 0.081300 0.205505 0.381887 0.473697 0.276363
2011-02 0.058572 0.167685 0.331332 0.468365 0.240370
2011-03 0.056510 0.111077 0.150993 0.485600 0.233130
2011-04 0.063085 0.054470 0.095470 0.508748 0.139095
{S_RISK_LOSSVARIANCE20} (159 * 1972):
S_INFO_WINDCODE 000001.SZ 000002.SZ 000006.SZ 000009.SZ 000011.SZ
TRADE_DT
2010-12 0.053823 0.077690 0.162115 0.111925 0.113577
2011-01 0.028297 0.077450 0.168522 0.120645 0.116145
2011-02 0.034408 0.084615 0.110595 0.160458 0.114918
2011-03 0.032953 0.054705 0.083030 0.172730 0.026665
2011-04 0.020040 0.024795 0.040067 0.099190 0.040485
{S_RISK_LOSSVARIANCE60} (159 * 1972):
S_INFO_WINDCODE 000001.SZ 000002.SZ 000006.SZ 000009.SZ 000011.SZ
TRADE_DT
2010-12 0.061472 0.056778 0.119095 0.119423 0.082443
2011-01 0.041357 0.069182 0.129263 0.117492 0.102078
2011-02 0.042513 0.070825 0.128447 0.126650 0.104862
2011-03 0.035355 0.066420 0.113492 0.125372 0.099017
2011-04 0.034785 0.062015 0.108815 0.118875 0.092660
{stock_return} (159 * 1972):
000001.SZ 000002.SZ 000006.SZ 000009.SZ 000011.SZ
TRADE_DT
2010-12 NaN NaN NaN NaN NaN
2011-01 -0.030399 -0.002433 0.020086 0.059034 0.026354
2011-02 0.040496 -0.004878 0.015471 0.231982 0.078459
2011-03 0.009416 0.064951 0.085873 -0.168647 0.022487
2011-04 0.131841 -0.024166 -0.008929 -0.039582 0.201811
{stock_return_next} (159 * 1972):
000001.SZ 000002.SZ 000006.SZ 000009.SZ 000011.SZ
TRADE_DT
2010-12 -0.030399 -0.002433 0.020086 0.059034 0.026354
2011-01 0.040496 -0.004878 0.015471 0.231982 0.078459
2011-02 0.009416 0.064951 0.085873 -0.168647 0.022487
2011-03 0.131841 -0.024166 -0.008929 -0.039582 0.201811
2011-04 -0.029670 -0.075472 -0.092664 -0.071551 0.139935
# 正式回测
factor_to_test = ['div_12m','pe_ttm','mv','S_RISK_VARIANCE60','ROETTM','NPPCCutGrRt','OPITPrf','NetOCFTOReve','CurTotLia','CurRt','TotAstTRtTTM']
globals()['corr_matrix'] = pd.DataFrame(index=factor_to_test, columns=factor_to_test)
test_n_corsstest(factor_to_test, 5, 3)
【div_12m因子IC分析】 normal_ic均值:0.0229,normal_ic标准差:0.0552,normal_icir:0.4143,IC>0占比:0.6709 rank_ic均值:0.0375,rank_ic标准差:0.0763,rank_icir:0.4908,IC>0占比:0.7025
【pe_ttm因子IC分析】 normal_ic均值:0.0152,normal_ic标准差:0.0645,normal_icir:0.2363,IC>0占比:0.6013 rank_ic均值:0.0443,rank_ic标准差:0.0855,rank_icir:0.5177,IC>0占比:0.7025
【mv因子IC分析】 normal_ic均值:-0.0382,normal_ic标准差:0.1307,normal_icir:-0.2926,IC>0占比:0.3354 rank_ic均值:-0.0416,rank_ic标准差:0.1552,rank_icir:-0.2677,IC>0占比:0.3544
【S_RISK_VARIANCE60因子IC分析】 normal_ic均值:-0.0282,normal_ic标准差:0.0948,normal_icir:-0.297,IC>0占比:0.3861 rank_ic均值:-0.0489,rank_ic标准差:0.1113,rank_icir:-0.4396,IC>0占比:0.3291
【ROETTM因子IC分析】 normal_ic均值:0.017,normal_ic标准差:0.0614,normal_icir:0.2763,IC>0占比:0.7115 rank_ic均值:0.0432,rank_ic标准差:0.1217,rank_icir:0.3549,IC>0占比:0.6923
【NPPCCutGrRt因子IC分析】 normal_ic均值:0.0191,normal_ic标准差:0.0276,normal_icir:0.6914,IC>0占比:0.7308 rank_ic均值:0.0495,rank_ic标准差:0.0789,rank_icir:0.627,IC>0占比:0.7692
【OPITPrf因子IC分析】 normal_ic均值:0.0079,normal_ic标准差:0.0264,normal_icir:0.2994,IC>0占比:0.6346 rank_ic均值:0.0147,rank_ic标准差:0.0845,rank_icir:0.174,IC>0占比:0.5192
【NetOCFTOReve因子IC分析】 normal_ic均值:0.0175,normal_ic标准差:0.026,normal_icir:0.673,IC>0占比:0.7885 rank_ic均值:0.0297,rank_ic标准差:0.0607,rank_icir:0.4897,IC>0占比:0.7115
【CurTotLia因子IC分析】 normal_ic均值:-0.0038,normal_ic标准差:0.0359,normal_icir:-0.1072,IC>0占比:0.4615 rank_ic均值:-0.0051,rank_ic标准差:0.0464,rank_icir:-0.1099,IC>0占比:0.4615
【CurRt因子IC分析】 normal_ic均值:-0.0141,normal_ic标准差:0.0569,normal_icir:-0.2478,IC>0占比:0.4423 rank_ic均值:-0.0189,rank_ic标准差:0.0651,rank_icir:-0.2905,IC>0占比:0.3462
【TotAstTRtTTM因子IC分析】 normal_ic均值:0.0116,normal_ic标准差:0.0345,normal_icir:0.3372,IC>0占比:0.6731 rank_ic均值:0.0268,rank_ic标准差:0.0829,rank_icir:0.323,IC>0占比:0.6538
div_12m因子&pe_ttm因子相关系数: 0.1783
div_12m因子&mv因子相关系数: 0.1123
div_12m因子&S_RISK_VARIANCE60因子相关系数: -0.0635
div_12m因子&ROETTM因子相关系数: 0.1091
div_12m因子&NPPCCutGrRt因子相关系数: 0.2291
div_12m因子&OPITPrf因子相关系数: 0.1262
div_12m因子&NetOCFTOReve因子相关系数: 0.1159
div_12m因子&CurTotLia因子相关系数: 0.0003
div_12m因子&CurRt因子相关系数: 0.025
div_12m因子&TotAstTRtTTM因子相关系数: 0.1741
pe_ttm因子&mv因子相关系数: 0.1467
pe_ttm因子&S_RISK_VARIANCE60因子相关系数: -0.0551
pe_ttm因子&ROETTM因子相关系数: 0.1333
pe_ttm因子&NPPCCutGrRt因子相关系数: 0.2229
pe_ttm因子&OPITPrf因子相关系数: 0.0613
pe_ttm因子&NetOCFTOReve因子相关系数: 0.1215
pe_ttm因子&CurTotLia因子相关系数: -0.0393
pe_ttm因子&CurRt因子相关系数: 0.0688
pe_ttm因子&TotAstTRtTTM因子相关系数: 0.0907
mv因子&S_RISK_VARIANCE60因子相关系数: 0.0229
mv因子&ROETTM因子相关系数: 0.1596
mv因子&NPPCCutGrRt因子相关系数: 0.1827
mv因子&OPITPrf因子相关系数: 0.2091
mv因子&NetOCFTOReve因子相关系数: 0.0921
mv因子&CurTotLia因子相关系数: -0.0874
mv因子&CurRt因子相关系数: -0.0545
mv因子&TotAstTRtTTM因子相关系数: 0.1369
S_RISK_VARIANCE60因子&ROETTM因子相关系数: 0.1747
S_RISK_VARIANCE60因子&NPPCCutGrRt因子相关系数: 0.2645
S_RISK_VARIANCE60因子&OPITPrf因子相关系数: 0.2204
S_RISK_VARIANCE60因子&NetOCFTOReve因子相关系数: 0.1824
S_RISK_VARIANCE60因子&CurTotLia因子相关系数: 0.0886
S_RISK_VARIANCE60因子&CurRt因子相关系数: 0.1379
S_RISK_VARIANCE60因子&TotAstTRtTTM因子相关系数: 0.1811
ROETTM因子&NPPCCutGrRt因子相关系数: 0.2745
ROETTM因子&OPITPrf因子相关系数: 0.1998
ROETTM因子&NetOCFTOReve因子相关系数: 0.2177
ROETTM因子&CurTotLia因子相关系数: 0.1532
ROETTM因子&CurRt因子相关系数: 0.2404
ROETTM因子&TotAstTRtTTM因子相关系数: 0.1677
NPPCCutGrRt因子&OPITPrf因子相关系数: 0.2325
NPPCCutGrRt因子&NetOCFTOReve因子相关系数: 0.283
NPPCCutGrRt因子&CurTotLia因子相关系数: 0.2666
NPPCCutGrRt因子&CurRt因子相关系数: 0.3317
NPPCCutGrRt因子&TotAstTRtTTM因子相关系数: 0.2623
OPITPrf因子&NetOCFTOReve因子相关系数: 0.1892
OPITPrf因子&CurTotLia因子相关系数: 0.1872
OPITPrf因子&CurRt因子相关系数: 0.2145
OPITPrf因子&TotAstTRtTTM因子相关系数: 0.1456
NetOCFTOReve因子&CurTotLia因子相关系数: 0.0783
NetOCFTOReve因子&CurRt因子相关系数: 0.1642
NetOCFTOReve因子&TotAstTRtTTM因子相关系数: 0.172
CurTotLia因子&CurRt因子相关系数: 0.2029
CurTotLia因子&TotAstTRtTTM因子相关系数: 0.1356
CurRt因子&TotAstTRtTTM因子相关系数: -0.0033